* Master Compile Do File
* Tax Farming Redux

/*********************************************************
	OVERVIEW
	This do file is the first step in the data cleaning / processing pipeline. It takes
	data from its raw fundamental form and cleans and compiles it into something that can
	be combined with treatment and other analysis variables.
	
	The do files assumes that data is stored in the following folder structure
	
	* PAKTAX_ANALYSIS
	*	- Data
			- Cleaned Data
				- [Outputs from the files below are stored in sub-directories here]
			- Do Files
				- [Do files that are called below are located in sub-directories here]
			- Raw Data
				- [Inputs for the files below are stored in sub-directories here]
	* PAKTAX_RANDOMIZATION
		- [Results of randomization are stored in some directory here]

* Last edited: June 2015, Kunal Mangal
***********************************************************/

clear all
set more off

/*******************************************************************************
* Set-up:
*******************************************************************************/

/*	Set up the directory global. This will be shared across do files. Just add your 
	directory below to get set up. */


cap cd "C:\Users\alawther\Dropbox (MIT)\Pakistan\Public-Alyssa\"

global PAKTAX_ANALYSIS = "`c(pwd)'"
global PAKTAX_RANDOMIZATION = "${PAKTAX_ANALYSIS}/Data/Randomization"

/* 	Latest fiscal year / quarter for analysis. 
	Leave this fixed. These globals are referred to in the sub do files. */
global ynow = 2013
global qnow = 4
	
/*******************************************************************************
****	TAX DATA
*******************************************************************************/

* Crosswalking
* --------------------------------

* Crosswalking : Circle boundaries change regularly, ,which means the raw data doesn't give us a panel
*				 that's comparable over time. We construct a synthetic panel through "crosswalking"
*				 by using ratios of total net demand to determine counterfactual demand/recovery assuming
*				 circles maintained their original boundaries as of the time of randomization.


* Key File
// Make the key file, which encodes the robust partitions for each quarter. Crosswalking is done within
// robust partitions

	// INPUTS: Data/Raw Data/Administrative Data/Crosswalk/ (These files encode which circles merge into which)
	// OUTPUTS: Data/Cleaned Data/Administrative Data/Crosswalk/Keys (These files encode the robust partitions)
	
	do "${PAKTAX_ANALYSIS}/Data/Do Files/Cleaning/Administrative Data/Crosswalk/Make Keys.do"

* Crosswalk Data to Analysis
	// INPUTS: Data/Cleaned Data/Administrative Data/Crosswalk/Keys -- for robust partitions
	//		   Data/Cleaned Data/Administrative Data/Quarterly -- for circle data
	
	// OUTPUTS: Data/Cleaned Data/Administrative Data/Crosswalk/Transition Matrices/To Present
	//			Data/Cleaned Data/Administrative Data/Crosswalk/Quarterly/To Present - Analysis
	
	do "${PAKTAX_ANALYSIS}/Data/Do Files/Cleaning/Administrative Data/Crosswalk/Crosswalk to Present - Analysis.do"

* Crosswalk Data to Randomization
	// INPUTS: Data/Cleaned Data/Administrative Data/Crosswalk/Keys -- for robust partitions
	//		   Data/Cleaned Data/Administrative Data/Quarterly -- for circle data
	
	// OUTPUTS: Cleaned Data/Administrative Data/Crosswalk/Transition Matrices/To Randomization
	//			Cleaned Data/Administrative Data/Crosswalk/Quarterly/To Randomization

	// Crosswalk from FY 2013 Q4
	local ynow = ${ynow}
	local qnow = ${qnow}
	
	forval y = 2012/ `ynow' {
		forval q = 1/4 {
			if (`y' < `ynow') | (`y' == `ynow' & `q' <= `qnow') do "${PAKTAX_ANALYSIS}/Data/Do Files/Cleaning/Administrative Data/Crosswalk/Crosswalk to Randomization.do" `y' `q'
		}
	}
	
* Compile datasets
	do "${PAKTAX_ANALYSIS}/Data/Do Files/Cleaning/Administrative Data/Compile Current Circles.do"
	do "${PAKTAX_ANALYSIS}/Data/Do Files/Cleaning/Administrative Data/Compile Randomization Circles.do"
	* --> Creates Data/Cleaned Data/Administrative Data/Punjab Crosswalked - Current Circles.dta and
	*			  Data/Cleaned Data/Administrative Data/Punjab Crosswalked - Randomization Circles.dta
		
/*******************************************************************************
****	RANDOMIZATION
*******************************************************************************/

** Cleans up Phase II Randomization
	// INPUTS: Data/Randomization/Phase II/110809 Randomization Output - Additional Circles 999 - Copy
	//		   Data/Randomization/Phase II/110810 7861 Subreatment Randomization - Copy
	// OUTPUTS: Data/Cleaned Data/Randomization/121116 Cleaned Phase II Randomization.dta

	do "${PAKTAX_ANALYSIS}/Data/Do Files/Cleaning/Randomization/121116 Clean Year 1 Randomization.do"

** Combines and cleans up randomizations
	// INPUTS: Data/Cleaned Data/Randomization/121116 Cleaned Phase II Randomization.dta
	//		   Data/Randomization/Phase III/120707 Randomization Output 7214.dta
	//		   Data/Randomization/ETO & AETO/120707 ETO Randomization Output 6465 - New List - Randomization Circles.dta
	//		   Data/Randomization/ETO & AETO/120707 ETO Randomization Output 6465.dta
	// OUTPUTS: Data/Cleaned Data/Randomization/130506 Cleaned Phase III Randomization.dta
	
	do "${PAKTAX_ANALYSIS}/Data/Do Files/Cleaning/Randomization/130506 Clean Year 2 Randomization.do"

/*******************************************************************************
****	SURVEY DATA
*******************************************************************************/	
	
* Round 3 Survey Cleaning
	// This is the survey data used for analysis. It was conducted in all study circles.
	// Due to logistics, the survey was split into two phases. The 1st phase ran from
	// May - June/July, and the 2nd phase ran from August-December.

	// INPUTS : Data/Raw Data/Survey Data/Survey Round 3/*
	//			Data/Raw Data/Survey Data/130520 Circle Selection for Phase III Survey - All Circles.dta
	// OUTPUTS: Data/Cleaned Data/Field Survey/Round 3/Compiled/*   
	
	do "${PAKTAX_ANALYSIS}/Data/Do Files/Cleaning/Survey Data/Round 3/Unencrypt and compile.do"
	
	
	// INPUTS : Data/Cleaned Data/Field Survey/Round 3/Compiled/*
	// OUTPUTS : Data/Cleaned Data/Field Survey/Round 3/survey_cleaned_data.dta

	do "${PAKTAX_ANALYSIS}/Data/Do Files/Cleaning/Survey Data/Round 3/survey_round3_cleaning.do"
	* -- Note that TaxLiabilityCalc and Imputed Tax - Corrected.do are called by this do file

/*******************************************************************************
****	INSPECTOR SURVEY
*******************************************************************************/

* Inspector Survey Cleaning

// We conducted a phone survey with inspectors to capture
// how their behavior had changed in response to the scheme.

	do "${PAKTAX_ANALYSIS}/Data/Do Files/Cleaning/Inspector Survey/131008 Inspector Survey Cleaning.do"
